This document presents the subset of the figures used for paper about monitoring.
library(dplyr)
library(tidyr)
library(ggplot2)
library(ggmap)
library(scatterpie)
library(rgdal)
library(multcompView)
library(car)
library(ggpmisc)
library(chisq.posthoc.test)
library(vcd)
Sampling data correspond to the data collected with kobo and previously cleaned with the script 1_preprocesamiento_datos_kobo.Rmd.
# load data
muestreo_tidy<-read.delim("../data/kobo/muestreo_dic2020_tidy.txt", header = TRUE)
parcelas_tidy<-read.delim("../data/kobo/parcelas_dic2020_tidy.txt", header = TRUE)
# pivot long parcelas data to have health data as a single variable
parcelas_long<-pivot_longer(parcelas_tidy,
cols = healthy:worm,
names_to = "tree_health_simplified",
values_to = "n_trees")
Data analyzed here correspond only to the trees that were approved during the validation by manually reviewing the photographs in kobotoolbox. Total of 1778 trees sampled, 1765 were approved in the validation.
muestreo_tidy<- filter(muestreo_tidy, X_validation_status=="validation_status_approved")
Color palettes:
# Make a nice color pallete and legend order for all plots
my_cols=c("darkgreen",
"darkred",
"orangered1",
"cadetblue",
"tan",
"beige",
# "burlywood4",
"coral",
"aquamarine3",
"gray70",
"black")
desired_order=c("healthy",
"ozone",
"ozone_and_other",
"others_combined",
"drougth",
"fungi",
# "insect",
"worm",
"acid_rain",
"other",
"dead")
desired_names=c("healthy",
"ozone",
"ozone and other",
"others combined",
"drougth",
"fungi",
# "insect",
"worm",
"acid rain",
"other",
"dead")
# For ozone damage percentage
my_cols2<-c("darkgreen", "gold2", "chocolate1", "orangered", "red4", "darkorchid4")
desired_order_percentage<-c("0%","less than 10%", "10 to 40%", "40 to 50%", "50 to 70%", "more than 70%")
Multiplot fun:
# Multiple plot function
#
# ggplot objects can be passed in ..., or to plotlist (as a list of ggplot objects)
# - cols: Number of columns in layout
# - layout: A matrix specifying the layout. If present, 'cols' is ignored.
#
# If the layout is something like matrix(c(1,2,3,3), nrow=2, byrow=TRUE),
# then plot 1 will go in the upper left, 2 will go in the upper right, and
# 3 will go all the way across the bottom.
#
multiplot <- function(..., plotlist=NULL, file, cols=1, layout=NULL) {
library(grid)
# Make a list from the ... arguments and plotlist
plots <- c(list(...), plotlist)
numPlots = length(plots)
# If layout is NULL, then use 'cols' to determine layout
if (is.null(layout)) {
# Make the panel
# ncol: Number of columns of plots
# nrow: Number of rows needed, calculated from # of cols
layout <- matrix(seq(1, cols * ceiling(numPlots/cols)),
ncol = cols, nrow = ceiling(numPlots/cols))
}
if (numPlots==1) {
print(plots[[1]])
} else {
# Set up the page
grid.newpage()
pushViewport(viewport(layout = grid.layout(nrow(layout), ncol(layout))))
# Make each plot, in the correct location
for (i in 1:numPlots) {
# Get the i,j matrix positions of the regions that contain this subplot
matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
print(plots[[i]], vp = viewport(layout.pos.row = matchidx$row,
layout.pos.col = matchidx$col))
}
}
}
Configure google api for maps:
# code adapted from https://rgraphgallery.blogspot.com/2013/04/rg-plot-pie-over-g0ogle-map.html
## configure google api
# You first need to register your api key in https://cloud.google.com/maps-platform/#get-started and follow instructions. The geocoding API is a free service, but you nevertheless need to associate a credit card with the account. Please note that the Google Maps API is not a free service. There is a free allowance of 40,000 calls to the geocoding API per month, and beyond that calls are $0.005 each.
# after you obtain your api, save it in /scripts/api_key.api (not shown in this repo por obvious reasons).
# if you get the following error when running get_map():
#"Error in aperm.default(map, c(2, 1, 3)) :
# invalid first argument, must be an array "
# check this troubleshooting: https://rgraphgallery.blogspot.com/2013/04/rg-plot-pie-over-google-map.html
## load and register api
api <- readLines("api_key.api")
register_google(key = api)
Map and monitoring figures presented in the paper:
# get cdmx shape
CDMX<-readOGR(dsn="../data/spatial", layer="CDMX")
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/veronicareyesgalindo/Documents/GitHub/monitoreo-oyameles/data/spatial", layer: "CDMX"
## with 1 features
## It has 8 fields
CDMX<-fortify(CDMX)
# get PNDL shape
PNDL<-readOGR(dsn="../data/spatial", layer="Desierto_Leones_Geo_ITRF08")
## Warning in OGRSpatialRef(dsn, layer, morphFromESRI = morphFromESRI, dumpSRS =
## dumpSRS, : Discarded datum International_Terrestrial_Reference_Frame_2008 in
## Proj4 definition: +proj=longlat +ellps=GRS80 +no_defs
## OGR data source with driver: ESRI Shapefile
## Source: "/Users/veronicareyesgalindo/Documents/GitHub/monitoreo-oyameles/data/spatial", layer: "Desierto_Leones_Geo_ITRF08"
## with 1 features
## It has 14 fields
PNDL<-fortify(PNDL)
# get background map
sat_map = get_map(location = c(lon = -99.133549, lat = 19.3), zoom = 10, maptype = 'terrain-background', source = "google")
## plot
p_a<-ggmap(sat_map) +
geom_polygon(data = CDMX,
aes(x = long, y = lat, group = group),
color="black", fill=NA, size=1.5) +
geom_polygon(data = PNDL,
aes(x = long, y = lat, group = group),
color="red", fill=NA, size=1.5) +
geom_point(aes(x=-98.95, y=19.6),
shape=0, stroke=2, size=5, color="black") +
geom_point(aes(x=-98.95, y=19.55),
shape=0, stroke=2, size=5, color="red") +
geom_text(aes(label="CDMX", x=-98.87, y=19.6),
color="Black", fontface="bold", size=5) +
geom_text(aes(label="PNDL", x=-98.87, y=19.55),
color="Black", fontface="bold", size=5) +
theme(text = element_text(size = 20))+
ggtitle("a)")
# get background map
sat_map = get_map(location = c(lon = -99.30, lat = 19.31), zoom = 13, maptype = 'satellite', source = "google")
## add towns names
towns<-data.frame(nombre=c("San Bartolo Ameyalco",
"Santa Rosa Xochiac",
"San Mateo Tlaltenango"),
long=c(-99.270, -99.29, -99.276),
lat=c(19.333, 19.325, 19.346))
## plot
p_b<-ggmap(sat_map) +
geom_polygon(data = PNDL,
aes(x = long, y = lat, group = group),
color="red", fill=NA, size=1.5) +
geom_point(data=towns, aes(x=long, y=lat), colour="red", size=1.5) +
geom_text(data=towns, aes(label=nombre, x=long, y=lat),
color="white", fontface="bold",
size=5, nudge_y=0.003) +
# add Cruz de Coloxtitla (CX), and Convento (Cn) landmarks
geom_text(aes(label="X", x=-99.3014, y=19.286068),
color="white", fontface="bold", size=4) +
geom_text(aes(label="C", x=-99.31, y=19.3133),
color="white", fontface="bold", size=4) +
theme(text = element_text(size = 20))+
ggtitle("b)")
## plot map
# get map
sat_map = get_map(location = c(lon = -99.3060, lat = 19.2909), zoom = 14, maptype = 'satellite', source = "google")
# plot sampled plots
p_c <- ggmap(sat_map)
p_c <- p_c + geom_point(data=parcelas_tidy,
aes(x=X_coordinates_longitude,
y=X_coordinates_latitude),
color="red") +
geom_text(data=parcelas_tidy,
aes(x=X_coordinates_longitude,
y=X_coordinates_latitude,
label=plot),
color="white",
check_overlap = TRUE,
hjust = 0, vjust=1, nudge_x = 0.0005,
size= 5) +
theme(text = element_text(size = 20))+
ggtitle("c)")
The following figure shows the total number of trees sampled in each 10x10 m plot, and how many of these are under some category of damage:
p_d <- ggplot(parcelas_long, aes(x=plot, y=n_trees, fill=tree_health_simplified)) +
geom_bar(stat="identity") +
scale_fill_manual(values= my_cols, breaks = desired_order,
labels= desired_names,
name= "Health status")
p_d <- p_d + theme_bw() +
labs(x="Plots", y= "Number of trees") +
theme(text = element_text(size = 20)) +
ggtitle("d)")
multiplot(p_a, p_c, p_b, p_d, cols=2)
el estado de salud del aárbol es independiente de su condición de reforestación H:árbolres dañados por ozono dependen de si fueron reforestados
# Select tree reforested and covered data
cont_tab<- select(muestreo_tidy, contains(c("tree_health_simplified", "reforested", "tree_exposition"))) %>%
filter(tree_health_simplified == "healthy"| tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other")
table(cont_tab)
## , , tree_exposition = cover
##
## reforested
## tree_health_simplified no yes
## healthy 247 75
## ozone 42 20
## ozone_and_other 278 41
##
## , , tree_exposition = exposed
##
## reforested
## tree_health_simplified no yes
## healthy 136 31
## ozone 52 11
## ozone_and_other 157 23
tab_reforested<- as.table(array(c(sum(with(cont_tab, tree_health_simplified == "healthy" & reforested == "yes")),
sum(with(cont_tab, tree_health_simplified == "ozone" & reforested == "yes")),
sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & reforested == "yes")),
sum(with(cont_tab, tree_health_simplified == "healthy" & reforested == "no")),
sum(with(cont_tab, tree_health_simplified == "ozone" & reforested == "no")),
sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & reforested == "no"))),
dim=c(3,2), dimnames=list( c("healthy","ozone","ozone and other"), c("yes","no"))))
# Pass data matrix to chisq.posthoc.test function
names(attributes(tab_reforested)$dimnames) <- c("healthy status", "reforested")
# Barplot
p_3a<-ggplot(cont_tab, aes(reforested, ..count..)) +
geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
scale_fill_manual(name ="Health status", values = c("healthy" = "darkgreen", "ozone" = "darkred", "ozone_and_other" = "orangered1"),
labels= c("healthy", "ozone","ozone and other"))+
theme_bw()+ ggtitle("a)")+ theme(legend.title.align = 0.5)+ theme(text = element_text(size = 20))+
theme(plot.title = element_text(lineheight=1.1, face="bold"))+
labs(y="Number of trees", x= "Reforested")
# Mosaic Plot with vcd library
p_3b<-mosaic(tab_reforested, shade=TRUE, legend=TRUE,
labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))
# Pruebas de otros mosaicos
## OPCION 1
library("graphics")
mosaicplot(tab_reforested, shade = TRUE, las=2,
main = "housetasks")
## OPCION 2
# install.packages("vcd")
library("vcd")
# plot just a subset of the table
assoc(head(tab_reforested, 5), shade = TRUE, las=2)
# Chi2
chisq <- chisq.test(tab_reforested)
chisq
##
## Pearson's Chi-squared test
##
## data: tab_reforested
## X-squared = 17.399, df = 2, p-value = 0.0001666
# Observed counts
chisq$observed
## reforested
## healthy status yes no
## healthy 106 383
## ozone 31 94
## ozone and other 64 435
# Expected counts
round(chisq$expected,2)
## reforested
## healthy status yes no
## healthy 88.31 400.69
## ozone 22.57 102.43
## ozone and other 90.12 408.88
# Pearson residuals (residuos estandarizados)
round(chisq$residuals, 3)
## reforested
## healthy status yes no
## healthy 1.882 -0.884
## ozone 1.773 -0.833
## ozone and other -2.751 1.292
# Visuaalize Pearson residuals
library(corrplot)
corrplot(chisq$residuals, is.cor = FALSE)
# Contibution in percentage (%)
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
## reforested
## healthy status yes no
## healthy 20.366 4.489
## ozone 18.075 3.984
## ozone and other 43.499 9.587
# Visualize the contribution
corrplot(contrib, is.cor = FALSE)
tab_covered<- as.table(array(c(sum(with(cont_tab, tree_health_simplified == "healthy" & tree_exposition == "cover")),
sum(with(cont_tab, tree_health_simplified == "ozone" & tree_exposition == "cover")),
sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & tree_exposition == "cover")),
sum(with(cont_tab, tree_health_simplified == "healthy" & tree_exposition == "exposed")),
sum(with(cont_tab, tree_health_simplified == "ozone" & tree_exposition == "exposed")),
sum(with(cont_tab, tree_health_simplified == "ozone_and_other" & tree_exposition == "exposed"))),
dim=c(3,2), dimnames=list( c("healthy","ozone","ozone and other"), c("yes","no"))))
# Pass data matrix to chisq.posthoc.test function
names(attributes(tab_covered)$dimnames) <- c("healthy status", "covered")
#Barplot
p_3c<-ggplot(cont_tab, aes(tree_exposition, ..count..)) +
geom_bar(aes(fill = tree_health_simplified), position = "dodge")+
scale_fill_manual(name ="Health status",
values = c("healthy" = "darkgreen", "ozone" = "darkred", "ozone_and_other" = "orangered1"),
labels= c("healthy", "ozone","ozone and other"))+
theme_bw()+ ggtitle("c)")+theme(legend.title.align = 0.5)+theme(text = element_text(size = 20))+
theme(plot.title = element_text(lineheight=1.1, face="bold"))+
labs(y="Number of trees", x= "Tree exposition")
# Mosaic Plot with vcd library
p_3d <- mosaic(tab_covered, shade=TRUE, legend=TRUE, labeling_args=list(rot_labels=c(bottom=90,top=0),gp_labels=(gpar(fontsize=12))))
# Pruebas de otros mosaicos
## OPCION 1
mosaicplot(tab_covered, shade = TRUE, las=2,
main = "housetasks")
## OPCION 2
# plot just a subset of the table
assoc(head(tab_covered, 5), shade = TRUE, las=2)
# Chi2
chisq <- chisq.test(tab_covered)
chisq
##
## Pearson's Chi-squared test
##
## data: tab_covered
## X-squared = 11.524, df = 2, p-value = 0.003145
# Observed counts
chisq$observed
## covered
## healthy status yes no
## healthy 322 167
## ozone 62 63
## ozone and other 319 180
# Expected counts
round(chisq$expected,2)
## covered
## healthy status yes no
## healthy 308.87 180.13
## ozone 78.95 46.05
## ozone and other 315.18 183.82
# Pearson residuals (residuos estandarizados)
round(chisq$residuals, 3)
## covered
## healthy status yes no
## healthy 0.747 -0.979
## ozone -1.908 2.498
## ozone and other 0.215 -0.282
# Visuaalize Pearson residuals
corrplot(chisq$residuals, is.cor = FALSE)
# Contibution in percentage (%)
contrib <- 100*chisq$residuals^2/chisq$statistic
round(contrib, 3)
## covered
## healthy status yes no
## healthy 4.847 8.311
## ozone 31.589 54.163
## ozone and other 0.401 0.688
# Visualize the contribution
corrplot(contrib, is.cor = FALSE)
multiplot(p_3a, p_3c, p_3b, p_3d, cols=2)
## reforested yes no
## healthy status
## healthy 106 383
## ozone 31 94
## ozone and other 64 435
## covered yes no
## healthy status
## healthy 322 167
## ozone 62 63
## ozone and other 319 180
p <- filter(muestreo_tidy, tree_heigth<15, tree_nodes>0) %>%
ggplot(.) +
scale_fill_manual(values= my_cols, breaks = desired_order,
labels= desired_names,
name= "Health status") +
theme_bw()
p4_a <- p + geom_histogram(aes(x=tree_nodes,
fill=tree_health_simplified)) +
labs(x="Tree age (years)", y= "Number of trees") +
theme(text = element_text(size = 20)) +
theme(plot.title = element_text(lineheight=1.1, face="bold"))+
ggtitle("a)")
p4_a
## base data
# Definir plantas sanas y dañadas por otra cosa que no fuera ozono
# cond_PO<- se refiere a condition Percentage damage by Ozone
cond_PO<-as_data_frame(muestreo_tidy)
## Warning: `as_data_frame()` was deprecated in tibble 2.0.0.
## Please use `as_tibble()` instead.
## The signature and semantics have changed, see `?as_tibble`.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.
# Asignar 0% de daño por ozono a los árboles healthy
cond_PO$ozone_damage_percentage = ifelse(cond_PO$tree_health == "healthy", "0%", cond_PO$ozone_damage_percentage)
# Filtrar por porcentaje de daño
condition_PO<-cond_PO%>%
filter(ozone_damage_percentage == "0%" | ozone_damage_percentage == "less than 10%" | ozone_damage_percentage == "10 to 40%" | ozone_damage_percentage == "40 to 50%"| ozone_damage_percentage == "50 to 70%" | ozone_damage_percentage == "more than 70%")
condition_PO$ozone_damage_percentage <- as.factor(condition_PO$ozone_damage_percentage)
# Plot
p_od<- condition_PO %>% filter(!is.na(ozone_damage_percentage)) %>%
ggplot() +
scale_fill_manual(values= my_cols2,
breaks = desired_order_percentage,
labels = c("0%","less 10%", "10 to 40%", "40 to 50%",
"50 to 70%", "more 70%"),
name= "Ozone damage\n per tree") +
theme_bw() + theme(text = element_text(size = 20))
p4_b <- p_od +
geom_bar(aes(x=tree_nodes,
fill=ozone_damage_percentage)) +
labs(x="Tree age (years)", y= "Number of trees") +
theme(legend.title.align = 0.5)+
theme(plot.title = element_text(lineheight=1.1, face="bold"))+
ggtitle("b)")
p4_b
## Warning: Removed 147 rows containing non-finite values (stat_count).
# Filtrar por categorÃa de daño
condition_HOO<-muestreo_tidy%>%
filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other" )
condition_HOO$tree_health_simplified <- as.factor(condition_HOO$tree_health_simplified)
# Data distribution
# Los datos tienen a graficar es el número de nodos para cada categoria de salud.
# Los datos son continuos discretos, por lo tanto el analisis a seguir para buscar diferencias entre los grupos son:
# Puedo representar los resultados en boxplot
p4_c<-condition_HOO%>%
ggplot(aes(y= tree_nodes, x= tree_health_simplified))+
geom_boxplot(color="grey", notch = F)+
scale_color_manual(values= my_cols, labels= desired_names,
name= "Health status")+
geom_point(position="jitter",aes(color = tree_health_simplified), alpha=0.5, size= 2.5)+
xlab("")+ ylab("Tree age (years)")+
theme_bw()+
ggtitle("c)")+
theme(text = element_text(size = 20), axis.text.x=element_blank())+
theme(plot.title = element_text(lineheight=1.1, face="bold"))
p4_c
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing missing values (geom_point).
# Statistics
group_by(condition_HOO, tree_health_simplified) %>%
summarise(
count = n(),
mean = mean(tree_nodes, na.rm = TRUE),
sd = sd(tree_nodes, na.rm = TRUE)
)
# Prueba de normalidad en anova
# ANOVA
a <- aov(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO)
# Summary of the analysis
summary(a)
## Df Sum Sq Mean Sq F value Pr(>F)
## tree_health_simplified 2 111.7 55.86 171 <2e-16 ***
## Residuals 963 314.5 0.33
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 147 observations deleted due to missingness
# rechazamos la hipótesis nula de que las medias son iguales, y decidimos que la media del numero de nodos es distinta entre las categorias.
# Normalidad. No hay en valores menores a 0.05
# Extract the residuals
aov_residuals <- residuals(object = a )
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )
##
## Shapiro-Wilk normality test
##
## data: aov_residuals
## W = 0.98232, p-value = 1.98e-09
# Procedo a hacer un kruskal
# Debe tener homogeneidad
leveneTest(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO, center = "median")
kruskal.test(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO)
##
## Kruskal-Wallis rank sum test
##
## data: sqrt(tree_nodes) by tree_health_simplified
## Kruskal-Wallis chi-squared = 265.57, df = 2, p-value < 2.2e-16
#Prueba homogeneidad de varianzas y sà hay :(
leveneTest(sqrt(tree_nodes) ~ tree_health_simplified, data = condition_HOO, center = "median")
#poshoc que grupos difieren
pairwise.wilcox.test(x = sqrt(condition_HOO$tree_nodes), g = condition_HOO$tree_health_simplified, p.adjust.method = "holm" )
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: sqrt(condition_HOO$tree_nodes) and condition_HOO$tree_health_simplified
##
## healthy ozone
## ozone <2e-16 -
## ozone_and_other <2e-16 0.012
##
## P value adjustment method: holm
# Plot
p4_d<-condition_PO%>% filter(!is.na(ozone_damage_percentage)) %>%
ggplot(aes(y= tree_nodes, x= ozone_damage_percentage))+
geom_boxplot(color="grey", notch = F)+
scale_color_manual(values= my_cols2,labels = c("0%","less 10%", "10 to 40%", "40 to 50%",
"50 to 70%", "more 70%"))+
geom_point(position="jitter",aes(color = ozone_damage_percentage), alpha=0.5, size= 2.5)+
xlab("")+ ylab("Tree age (years)")+
labs(color = "Ozone damage\n per tree")+
theme_bw()+
ggtitle("d)")+
theme(legend.title.align = 0.5)+
theme(text = element_text(size = 20), axis.text.x=element_blank())+
theme(plot.title = element_text(lineheight=1.1, face="bold"))
p4_d
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing missing values (geom_point).
# Pruebas estadisticas
# ANOVA
a <- aov(sqrt(tree_nodes) ~ ozone_damage_percentage, data = condition_PO)
# Summary of the analysis
summary(a)
## Df Sum Sq Mean Sq F value Pr(>F)
## ozone_damage_percentage 5 120.0 24.008 75.27 <2e-16 ***
## Residuals 960 306.2 0.319
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 147 observations deleted due to missingness
# rechazamos la hipótesis nula de que las medias son iguales, y decidimos que la media del numero de nodos es distinta entre las categorias.
# Normalidad. No hay en valores menores a 0.05
# Extract the residuals
aov_residuals <- residuals(object = a )
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )
##
## Shapiro-Wilk normality test
##
## data: aov_residuals
## W = 0.98, p-value = 3.002e-10
# Procedo a hacer un kruskal
# Debe tener homogeneidad. Si es mayor a 0.05 No hay evidencias en contra de la homogeneidad de varianzas.
leveneTest(sqrt(tree_nodes) ~ ozone_damage_percentage, data = condition_PO, center = "median")
kruskal.test(sqrt(tree_nodes) ~ ozone_damage_percentage, data = condition_PO)
##
## Kruskal-Wallis rank sum test
##
## data: sqrt(tree_nodes) by ozone_damage_percentage
## Kruskal-Wallis chi-squared = 283.95, df = 5, p-value < 2.2e-16
#poshoc que grupos difieren
pairwise.wilcox.test(x = sqrt(condition_PO$tree_nodes), g = condition_PO$ozone_damage_percentage, p.adjust.method = "bonferroni" )
##
## Pairwise comparisons using Wilcoxon rank sum test with continuity correction
##
## data: sqrt(condition_PO$tree_nodes) and condition_PO$ozone_damage_percentage
##
## 0% 10 to 40% 40 to 50% 50 to 70% less than 10%
## 10 to 40% < 2e-16 - - - -
## 40 to 50% < 2e-16 1.0000 - - -
## 50 to 70% < 2e-16 0.0026 0.5273 - -
## less than 10% < 2e-16 0.6498 0.2086 2.9e-06 -
## more than 70% 1.7e-05 1.0000 1.0000 0.2060 1.0000
##
## P value adjustment method: bonferroni
multiplot(p4_a, p4_c, p4_b, p4_d, cols=2)
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing missing values (geom_point).
## Warning: Removed 147 rows containing non-finite values (stat_count).
## Warning: Removed 147 rows containing non-finite values (stat_boxplot).
## Warning: Removed 147 rows containing missing values (geom_point).
# Filtrar por categorÃa de daño
condition_HOO<-muestreo_tidy%>%
filter(tree_health_simplified == "healthy" | tree_health_simplified == "ozone" | tree_health_simplified == "ozone_and_other" )
condition_HOO$tree_health_simplified <- as.factor(condition_HOO$tree_health_simplified)
# Modelo 3 - Edad, salud y estructura espacial afectan crecimiento
glm3<-glm(log10(tree_heigth) ~ tree_nodes*tree_health_simplified + tree_exposition + reforested, data = condition_HOO)
# Ho= hay normalidad, si pvalues es mayor a 0.05 se acepta la Ho por lo tanto pvalue mayor a 0.05 hay normalidad
shapiro.test(glm3$residuals) # Normalidad ( Datos normales si es mayor a 0.05)
##
## Shapiro-Wilk normality test
##
## data: glm3$residuals
## W = 0.99778, p-value = 0.2238
cor.test(abs(glm3$residuals), glm3$fitted.values) #Homocedasticidad: Valor no significativa sig que si hay homocedasticidad
##
## Pearson's product-moment correlation
##
## data: abs(glm3$residuals) and glm3$fitted.values
## t = 1.4722, df = 964, p-value = 0.1413
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.0157601 0.1101083
## sample estimates:
## cor
## 0.04736209
par(mfrow =c(2,2))
plot(glm3)
summary(glm3)
##
## Call:
## glm(formula = log10(tree_heigth) ~ tree_nodes * tree_health_simplified +
## tree_exposition + reforested, data = condition_HOO)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.94431 -0.19691 0.01762 0.18800 0.84646
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) -0.694392 0.033429 -20.772
## tree_nodes 0.108988 0.004806 22.677
## tree_health_simplifiedozone 0.218104 0.096575 2.258
## tree_health_simplifiedozone_and_other 0.426346 0.053116 8.027
## tree_expositionexposed 0.069066 0.020785 3.323
## reforestedyes 0.032913 0.024310 1.354
## tree_nodes:tree_health_simplifiedozone -0.013930 0.010673 -1.305
## tree_nodes:tree_health_simplifiedozone_and_other -0.037654 0.006213 -6.060
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## tree_nodes < 2e-16 ***
## tree_health_simplifiedozone 0.024145 *
## tree_health_simplifiedozone_and_other 2.92e-15 ***
## tree_expositionexposed 0.000925 ***
## reforestedyes 0.176097
## tree_nodes:tree_health_simplifiedozone 0.192146
## tree_nodes:tree_health_simplifiedozone_and_other 1.95e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for gaussian family taken to be 0.08797878)
##
## Null deviance: 224.146 on 965 degrees of freedom
## Residual deviance: 84.284 on 958 degrees of freedom
## (147 observations deleted due to missingness)
## AIC: 403.34
##
## Number of Fisher Scoring iterations: 2
# Graficar modelo 3
Tree_height_plot<-ggplot(condition_HOO, aes(x = tree_nodes, y = log(tree_heigth))) + geom_point(aes(colour=tree_health_simplified), alpha=0.5, size= 2.5) + geom_smooth(method="glm",aes(color= tree_health_simplified), fullrange =T)+
labs( y = "log(Tree higth)", x = "Tree age (years)", color = "Health status")+
scale_color_manual(values= my_cols, labels= desired_names,
name= "Health status")+
theme(text = element_text(size = 20), axis.text.x=element_blank())+
theme(plot.title = element_text(lineheight=1.1, face="bold"))+
theme_bw()
Tree_height_plot
## Warning: Removed 147 rows containing non-finite values (stat_smooth).
## Warning: Removed 147 rows containing missing values (geom_point).
# plot pies in map
p_satmap <- ggmap(sat_map)
p_satmap +geom_scatterpie(data=parcelas_tidy,
aes(x=X_coordinates_longitude,
y=X_coordinates_latitude,
group=plot),
pie_scale = 1.5,
cols=desired_order,
color=NA,
alpha=1) +
scale_fill_manual(values= my_cols, breaks = desired_order,
labels= desired_names,
name= "Health status") +
theme(text = element_text(size = 20))
# Create new variable with porcentage of ozonoe damage
parcelas_tidy<-parcelas_tidy %>% rowwise() %>%
mutate(.,
total=sum(healthy,ozone,ozone_and_other,
drougth, acid_rain, other,
others_combined, dead, fungi,
# insect,
worm)) %>%
mutate(perc.ozone= sum(ozone, ozone_and_other)/total)
#plot
p <- ggplot(parcelas_tidy) +
geom_point(aes(x=X_coordinates_altitude,
y=perc.ozone))
p<- ggplot(parcelas_tidy, aes(X_coordinates_altitude, perc.ozone ))+
geom_point(color= "grey50", size = 3, alpha = 0.6)
p +
stat_smooth(color = "skyblue", formula = y ~ x,fill = "skyblue", method = "lm") +
stat_poly_eq(
aes(label = paste(..eq.label.., ..adj.rr.label.., sep = '~~~~')),
formula = y ~ x, parse = TRUE,
size = 10, # Tamaño de fuente de la fórmula
label.x = 0.1, #location, la proporción entre 0-1
label.y = 0.95)+
labs(x="Plot altitude", y= "Percentage of ozone damaged trees")+
theme_bw() +
theme(plot.title = element_text(lineheight=1.1, face="bold")) +
theme(text = element_text(size = 20))